
***********************************************
* (1.0) Import gapminder data 
***********************************************
import excel `"$d_data\population-gapminder-v6.xlsx"', ///
    sheet("data-for-countries-etc-by-year") firstrow clear

rename name       country
rename time       year
rename Population population

keep country year population
keep if year <= 2020

* World population and each country's share of it
egen world_pop = total(population), by(year)
gen  pop_share = 100 * population / world_pop

save `"$d_data\gapminder_pop_merge"', replace

import excel `"$d_data\tfr-by-gapminder_v12.xlsx"', ///
    sheet("countries_and_territories") firstrow clear

keep geoname y*

* Wide (y1950 y1951 ...) to long: one row per country-year
reshape long y, i(geoname) j(year)
rename y       tfr
rename geoname country

drop if missing(year)
drop if country == ""
drop if year >= 2020

* Drop countries with completely missing TFR history
gen  missing = missing(tfr)
gen  obs     = 1
egen nmissing = total(missing), by(country)
egen ntotal   = total(obs),     by(country)
drop if nmissing == ntotal
drop missing obs nmissing ntotal


***********************************************
* (2.0) Find years each country takes to go from TFR6 to TFR3
***********************************************

* Last year with TFR >= 6 for each country
egen last_6 = max(cond(tfr >= 6, year, .)), by(country)

* First year with TFR <= 3 for each country
egen early_3 = min(cond(tfr <= 3, year, .)), by(country)

* Duration of transition in years
gen duration = early_3 - last_6

***********************************************
* (3.0) Merge with population and plot China
***********************************************
merge m:1 country year using `"$d_data\gapminder_pop_merge"', ///
    keepusing(population pop_share)

* Keep only matched country-year observations
keep if _merge == 3
drop _merge

* Keep only the observation for the last year with TFR >= 6
keep if year == last_6

keep country last_6 early_3 duration population pop_share
drop if missing(duration)

* Drop problematic case
drop if country == "North Korea"

sort duration
save `"$d_data\gapminder_transition"', replace


***********************************************
* (4.0) Bar chart - large countries only  
***********************************************

* Keep only countries with >= 0.25% of world population at initial transition year. 

drop if pop_share < 0.25

* Label: Country (last_6–early_3)
tostring last_6 early_3, replace
gen name = country + " (" + last_6 + "-" + early_3 + ")"

graph hbar (mean) duration, ///
    over(name, sort(duration)) ///
    scale(0.5) ///
    stack ///
    graphregion(fcolor(white) lcolor(white)) ///
    ylabel(, nogrid angle(horizontal)) ///
    ytitle("") ///
    b1title("Years") ///
    blabel(bar) ///
    bar(1, color(black))
graph export `"$d_fig\Fig_A1_Gapminder.jpg"', replace

